In [1]:
import graphlab
In [2]:
graphlab.product_key.get_product_key()
Out[2]:
In [2]:
song_data = graphlab.SFrame('song_data.gl/')
In [4]:
song_data.head()
Out[4]:
In [5]:
graphlab.canvas.set_target('ipynb')
In [6]:
song_data['song'].show()
In [10]:
len(song_data)
Out[10]:
In [11]:
users = song_data['user_id'].unique()
len(users)
Out[11]:
In [12]:
train_data, test_data = song_data.random_split(0.8, seed=0)
In [15]:
popularity_model = graphlab.popularity_recommender.create(train_data, user_id='user_id', item_id='song')
In [16]:
popularity_model.recommend(users=[users[0]])
Out[16]:
In [17]:
popularity_model.recommend(users=[users[1]])
Out[17]:
In [18]:
personalized_model = graphlab.item_similarity_recommender.create(train_data, user_id='user_id', item_id='song')
In [19]:
personalized_model.recommend(users=[users[0]])
Out[19]:
In [20]:
personalized_model.recommend(users=[users[1]])
Out[20]:
In [21]:
personalized_model.get_similar_items(['With Or Without You - U2'])
Out[21]:
In [17]:
%matplotlib inline
In [22]:
model_performance = graphlab.recommender.util.compare_models(test_data,
[popularity_model, personalized_model],
user_sample = 0.05)
In [19]:
song_data.head()
Out[19]:
In [20]:
len(song_data[song_data['artist'] == 'Kanye West']['user_id'].unique())
Out[20]:
In [21]:
len(song_data[song_data['artist'] == 'Foo Fighters']['user_id'].unique())
Out[21]:
In [22]:
len(song_data[song_data['artist'] == 'Taylor Swift']['user_id'].unique())
Out[22]:
In [23]:
len(song_data[song_data['artist'] == 'Lady GaGa']['user_id'].unique())
Out[23]:
In [24]:
artist_songs = song_data.groupby(key_columns='artist',
operations={'total_count': graphlab.aggregate.SUM('listen_count')})
In [25]:
artist_songs
Out[25]:
In [26]:
artist_songs_sorted = artist_songs.sort('total_count', ascending=False)
In [27]:
artist_songs_sorted[0]
Out[27]:
In [28]:
artist_songs_sorted[-1]
Out[28]:
In [29]:
train_data, test_data = song_data.random_split(0.8, seed=0)
In [30]:
item_similarity_recommender = graphlab.item_similarity_recommender.create(train_data, user_id='user_id', item_id='song')
In [31]:
subset_test_users = test_data['user_id'].unique()[0:10000]
In [32]:
subset_test_recommendations = personalized_model.recommend(subset_test_users,k=1)
In [33]:
subset_test_recommendations.head()
Out[33]:
In [34]:
recommendations_song = subset_test_recommendations.groupby(key_columns='song',
operations={'num_recommendations' : graphlab.aggregate.COUNT()}
)
In [35]:
recommendations_song
Out[35]:
In [36]:
recommendations_song_sorted = recommendations_song.sort('num_recommendations', ascending=False)
In [37]:
recommendations_song_sorted
Out[37]:
In [ ]:
In [ ]: